#
animal_game <- read.csv("recognition_data/animalgame.csv") %>%
as.tibble() %>%
mutate(exp = 'animalgame') %>%
select(-X)
## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#
vehicle_game <- read.csv("recognition_data/vehiclegame.csv") %>%
as.tibble() %>%
mutate(exp = 'vehiclegame') %>%
select(-X)
#
biganimal_game <- read.csv("recognition_data/biganimalgame.csv") %>%
as.tibble() %>%
mutate(exp = 'biganimalgame') %>%
select(-X)
object_game <- read.csv("recognition_data/objectgame.csv") %>%
as.tibble() %>%
mutate(exp = 'objectgame') %>%
select(-X)
recog_data <- animal_game %>%
full_join(vehicle_game)%>%
full_join(biganimal_game) %>%
full_join(object_game)
## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")
## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")
## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")
## make copy for editing
orig_d <- recog_data
d <- recog_data
# make similar levels
d$clicked_category = as.factor(d$clicked_category)
d$intended_category = factor(d$intended_category, levels=levels(d$clicked_category))
# compute accurcy
d <- d %>%
mutate(correct_or_not = (clicked_category == intended_category)) %>%
mutate(recognizer_age_numeric = str_split_fixed(recognizer_age, 'age',2)[,2]) %>%
mutate(recognizer_age_numeric = as.numeric(recognizer_age_numeric))
d$recognizer_age <- factor(d$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))
##Filter out adults, those that didn't get past more than 1 real trial, and trials with RTs that are way too long or short
adults <- d %>%
filter(recognizer_age == 'adult')
didnt_start <- d %>%
group_by(sessionId) %>%
mutate(count_trials = max(trial_num)) %>%
filter(count_trials < 5)
# do actual filtering here
d <- d %>%
filter(!sessionId %in% didnt_start$sessionId) %>%
filter(!sessionId %in% adults$sessionId) %>%
filter(!recognizer_age=='age2') %>%
filter(RT>100 & RT<10000) # super long or super short trial
drawings_per_exp <- d %>%
group_by(exp) %>%
summarize(num_drawings_seen = length(unique(sketch_path)))
## `summarise()` ungrouping output (override with `.groups` argument)
# threshold : 75% correct
threshold=.75
# compute avg correct photo trials for each subject
photo_correct <- d %>%
group_by(sessionId,recognizer_age) %>%
filter(producer_age == "photo") %>%
summarize(avg_photo_correct = mean(correct_or_not))
## `summarise()` regrouping output by 'sessionId' (override with `.groups` argument)
# visualize these data by each age group
ggplot(photo_correct, aes(x=recognizer_age, y=avg_photo_correct, col=recognizer_age)) +
geom_jitter(alpha=.6) +
scale_color_viridis(discrete=TRUE) +
geom_hline(yintercept=threshold)
# make a list of the subjects who don't meet our threshold
bad_subs <- photo_correct %>%
filter(avg_photo_correct < threshold) ## includes subjects who got 75% correct, excludes all those below
# filter bad subs
d <- d %>%
filter(!sessionId %in% bad_subs$sessionId)
# check that we did this right
photo_trials_by_sub <- d %>%
filter(producer_age == 'photo') %>%
group_by(sessionId) %>%
summarize(avg_correct = mean(correct_or_not))
## `summarise()` ungrouping output (override with `.groups` argument)
# make sure this is true.
assert_that(sum(photo_trials_by_sub$avg_correct<threshold)==0)
## [1] TRUE
cor_by_trial_type <- d %>%
mutate(photo_or_not = (producer_age == 'photo')) %>%
group_by(photo_or_not,sessionId) %>%
summarize(count_cor = sum(correct_or_not), count_items = n(), avg_correct = count_cor / count_items)
## `summarise()` regrouping output by 'photo_or_not' (override with `.groups` argument)
only_one_type <- cor_by_trial_type %>%
group_by(sessionId) %>%
summarize(count_ids = n()) %>%
filter(count_ids == 1)
## `summarise()` ungrouping output (override with `.groups` argument)
# filter these subjects
d <- d %>%
filter(!sessionId %in% only_one_type$sessionId)
num_trials_per_kid <- d %>%
# filter(!sessionId %in% adults$sessionId) %>% # exclude adults (prereg code, error)
filter(recognizer_age != "adult") %>% # exclude adults
group_by(sessionId) %>%
summarize(max_trials = max(trial_num)) %>%
summarize(average_trials = mean(max_trials))
## `summarise()` ungrouping output (override with `.groups` argument)
num_kids_per_exp <- d %>%
filter(recognizer_age != "adult") %>% # exclude adults
group_by(exp,recognizer_age) %>%
summarize(num_subs = length(unique(sessionId)))
## `summarise()` regrouping output by 'exp' (override with `.groups` argument)
##
First, we excluded children who started the game but did not complete more than 1 trial after the practice trials (N = 1068 participants) and the 238 adults who participated. We also excluded all trials with RTs slower than 10s or faster than 100ms, judging these to be off-task responses. Next, we excluded participants on the basis of their performance on practice and catch trials; given that these catch trials presented a very easy recognition task, we excluded participants who did not acheive at least 75% accuracy on these trials (N= 795). The remaining 1789 who met this criterion completed an average of 21.69 trials. On total, we analyzed 36615 trials where children recognized each others drawings.
bad_subs_descriptives <- orig_d %>%
filter(sessionId %in% bad_subs$sessionId) %>%
group_by(sessionId) %>%
summarize(count_trials = n(), recognizer_age = recognizer_age[1]) %>%
group_by(recognizer_age) %>%
summarize(count_subs = n(), avg_trials = mean(count_trials))
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
kable(bad_subs_descriptives)
| recognizer_age | count_subs | avg_trials |
|---|---|---|
| age10 | 46 | 19.19565 |
| age3 | 270 | 14.37778 |
| age4 | 180 | 16.18889 |
| age5 | 103 | 21.38835 |
| age6 | 60 | 20.16667 |
| age7 | 50 | 17.58000 |
| age8 | 49 | 17.34694 |
| age9 | 37 | 15.45946 |
d %>%
group_by(recognizer_age) %>%
summarize(num_subs = length(unique(sessionId))) %>%
kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| recognizer_age | num_subs |
|---|---|
| age3 | 329 |
| age4 | 408 |
| age5 | 301 |
| age6 | 227 |
| age7 | 188 |
| age8 | 131 |
| age9 | 84 |
| age10 | 121 |
by_recognizer_photo <- d %>%
group_by(recognizer_age) %>%
filter(producer_age == 'photo') %>%
group_by(sessionId,recognizer_age) %>%
summarize(indiv_photo_correct = mean(correct_or_not)) %>% # average first over individual participants
group_by(recognizer_age) %>%
multi_boot_standard(col = 'indiv_photo_correct')
## `summarise()` regrouping output by 'sessionId' (override with `.groups` argument)
by_recognizer_photo$recognizer_age <- factor(by_recognizer_photo$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))
ggplot(by_recognizer_photo,aes(x=recognizer_age, y=mean, col = recognizer_age)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
scale_color_viridis(discrete = "TRUE") +
ylab('Proportion recognized - PHOTO') +
ylim(.75,1) + # can't be lower than 75% correct or higher than 1
theme_few()
by_recognizer_age <- d %>%
filter(producer_age != 'photo') %>%
group_by(sessionId, recognizer_age_numeric) %>%
summarize(avg_correct = mean(correct_or_not), num_trials = n()) %>%
filter(num_trials > 5) %>%
group_by(recognizer_age_numeric) %>%
multi_boot_standard(col = 'avg_correct')
## `summarise()` regrouping output by 'sessionId' (override with `.groups` argument)
by_each_recognizer <- d %>%
group_by(sessionId,recognizer_age_numeric) %>%
filter(producer_age != 'photo') %>%
summarize(avg_correct = mean(correct_or_not), num_trials = n()) %>%
filter(num_trials > 5)
## `summarise()` regrouping output by 'sessionId' (override with `.groups` argument)
base_size_chosen=12
ggplot(by_recognizer_age,aes(x=recognizer_age_numeric, y=mean, col = recognizer_age_numeric)) +
theme_few(base_size = base_size_chosen) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
geom_jitter(data = by_each_recognizer, aes(x=recognizer_age_numeric, y=avg_correct, size=num_trials), alpha=.05, width=.1, height=.01) +
scale_color_viridis(discrete = "FALSE") +
scale_size_area(max_size = 5) +
ylab('Proportion drawings recognized') +
ylim(0, 1) +
geom_hline(yintercept = .25, linetype = 'dashed', color='grey') +
geom_smooth(color = 'grey', span=10) +
xlab('Age of child recognizing (yrs)') +
theme(legend.position='none', aspect.ratio = 1) +
# labs(title='Drawing recognition by age') +
scale_x_continuous(breaks = seq(3,10,1))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 13 rows containing missing values (geom_point).
ggsave('figures/drawing_recognition_by_age_v2.pdf',width=3, height=3, units='in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 13 rows containing missing values (geom_point).
base_size_chosen=12
ggplot(by_recognizer_age,aes(x=recognizer_age_numeric, y=mean, col = recognizer_age_numeric)) +
theme_few(base_size = base_size_chosen) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
geom_jitter(data = by_each_recognizer, aes(x=recognizer_age_numeric, y=avg_correct, size=num_trials), alpha=.1, width=.1, height=.01) +
scale_color_viridis(discrete = "FALSE") +
ylab('Proportion drawings recognized') +
ylim(0, 1) +
geom_hline(yintercept = .25, linetype = 'dashed', color='grey') +
geom_smooth(color = 'grey', span=10) +
xlab('Recognizer Age') +
theme(legend.position='none') +
# labs(title='Drawing recognition by age') +
scale_x_continuous(breaks = seq(3,10,1))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 14 rows containing missing values (geom_point).
ggsave('figures/drawing_recognition_by_age.pdf',width=3, height=4.8, units='in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 14 rows containing missing values (geom_point).
kids_sketches_d <- d %>%
filter(producer_age != 'photo') %>% # don't look at photo trials
filter(recognizer_age != 'age2') %>% # or 2-yr-olds
mutate(recognizer_age_numeric = as.numeric(str_split_fixed(recognizer_age,'age',2)[,2])) %>%
mutate(recognizer_age_group = cut(recognizer_age_numeric, c(2.9, 4, 6, 8, 10.1), labels = c("3-4 yrs","5-6 yrs","7-8 yrs","9-10 yrs"))) %>%
mutate(recognizer_age_group_numeric = cut(recognizer_age_numeric, c(2.9, 4, 6, 8, 10.1), labels=c(3,5,7,9))) %>%
mutate(recognizer_age_group_numeric = as.numeric(recognizer_age_group_numeric))
kids_sketches_d <- kids_sketches_d %>%
mutate(sketch_path = as.factor(str_split_fixed(sketch_path,'/',2)[,2])) %>%
mutate(sketch_path = as.factor(str_split_fixed(sketch_path,'.png',2)[,1]))
kids_sketches_d %>%
group_by(recognizer_age_group,exp) %>%
summarize(num_subs = length(unique(sessionId))) %>%
kable()
## `summarise()` regrouping output by 'recognizer_age_group' (override with `.groups` argument)
| recognizer_age_group | exp | num_subs |
|---|---|---|
| 3-4 yrs | animalgame | 111 |
| 3-4 yrs | biganimalgame | 211 |
| 3-4 yrs | objectgame | 291 |
| 3-4 yrs | vehiclegame | 124 |
| 5-6 yrs | animalgame | 63 |
| 5-6 yrs | biganimalgame | 174 |
| 5-6 yrs | objectgame | 190 |
| 5-6 yrs | vehiclegame | 101 |
| 7-8 yrs | animalgame | 33 |
| 7-8 yrs | biganimalgame | 81 |
| 7-8 yrs | objectgame | 137 |
| 7-8 yrs | vehiclegame | 68 |
| 9-10 yrs | animalgame | 37 |
| 9-10 yrs | biganimalgame | 57 |
| 9-10 yrs | objectgame | 77 |
| 9-10 yrs | vehiclegame | 34 |
image_by_exp <- d %>%
distinct(exp, sketch_path) %>%
mutate(image_path_short = as.factor(str_split_fixed(sketch_path,'/',2)[,2]))
age_diff_ordered_drawings <- d %>%
filter(!producer_age == 'photo') %>% # not photos
mutate(coarse_recognizer_age_group = cut(recognizer_age_numeric, c(2.9, 6, 10.1), labels = c("younger" ,"older"))) %>%
mutate(image_path_short = as.factor(str_split_fixed(sketch_path,'/',2)[,2])) %>%
group_by(image_path_short, coarse_recognizer_age_group) %>%
summarize(avg_correct = mean(correct_or_not), participants = n()) %>%
summarize(age_diff = avg_correct[coarse_recognizer_age_group == 'older'] - avg_correct[coarse_recognizer_age_group == 'younger'], all_participants = sum(participants)) %>%
left_join(image_by_exp) %>%
mutate(fullPath = here::here('data/drawings_recoggames',exp,image_path_short)) %>%
mutate(newPath = here::here('data/drawings_recoggames/ordered',paste0(round(age_diff,2),'_',image_path_short)))
## `summarise()` regrouping output by 'image_path_short' (override with `.groups` argument)
## `summarise()` regrouping output by 'image_path_short' (override with `.groups` argument)
## Joining, by = "image_path_short"
# dir.create(here::here('data/drawings_recoggames/ordered'))
# file.copy(age_diff_ordered_drawings$fullPath, age_diff_ordered_drawings$newPath)
# age_diff_ordered_drawings <- age_diff_ordered_drawings%>%
# mutate(category = str_split_fixed(image_path_short,'_', 2)[,1])
#
# ggplot(age_diff_ordered_drawings, aes(x=category, y=age_diff, size=all_participants)) +
# geom_point(alpha=.1)
by_recognizer_filtered <- kids_sketches_d %>%
group_by(recognizer_age, recognizer_age_numeric,exp) %>%
multi_boot_standard(col = 'correct_or_not')
count_trials <- kids_sketches_d %>%
group_by(recognizer_age) %>%
summarize(count_trials = n())
## `summarise()` ungrouping output (override with `.groups` argument)
by_recognizer_filtered$recognizer_age <- factor(by_recognizer_filtered$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))
## Scale dots by number of trials in each bin to get a sense of variability
by_recognizer_filtered <- by_recognizer_filtered %>%
left_join(count_trials) %>%
mutate(scale = count_trials / 1000) %>%
group_by(exp)
## Joining, by = "recognizer_age"
ggplot(by_recognizer_filtered,aes(x=recognizer_age_numeric, y=mean, col = recognizer_age_numeric, size=scale)) +
theme_few(base_size=18) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
geom_smooth(alpha=.2, color='grey') +
scale_color_viridis(discrete = "FALSE") +
ylab('Proportion drawings recognized') +
scale_size_area(max_size=1.5) +
ylim(.25,.8) +
xlab('Recognizer Age') +
geom_hline(yintercept = .25, linetype="dashed") +
theme(axis.ticks.x = element_blank(), legend.position='none', aspect.ratio = 1) +
facet_grid(~exp)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_pointrange).
# summarize avg correct by producer age
by_producer <- d %>%
filter(recognizer_age!= 'adult') %>%
group_by(producer_age) %>%
multi_boot_standard(col = 'correct_or_not')
ggplot(by_producer,aes(x=producer_age, y=mean, col = producer_age)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
scale_color_viridis(discrete = "TRUE") +
ylab('Proportion recognized') +
theme_few()
both_category <- d %>%
group_by(producer_age,intended_category) %>%
multi_boot_standard(col = 'correct_or_not')
ggplot(both_category,aes(x=producer_age, y=mean, col=producer_age)) +
theme_few() +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
ylab('average correct') +
theme(axis.text.x = element_text(angle = 90, hjust = 1), aspect.ratio = 1) +
facet_wrap(~intended_category) +
scale_color_viridis(discrete=TRUE)
We expect that the distintiveness of each drawing will be a major factor in how well it is recognized, and, further, that older children will be more sensitive to the presence of these distinctive features.
c_vehiclegame <- read.csv("compiled_classifications/classification-outputs-vehiclegame_C_0.1_460.csv") %>%
as.tibble() %>%
select(-X.1, -X) %>%
mutate(denom = (airplane_prob + train_prob + boat_prob + car_prob) - target_label_prob) %>%
mutate(log_odds = log(target_label_prob / denom)) %>% ## compute log odd probability
mutate(exp='vehiclegame')
# now read it and join with other classifications
c_animalgame <- read.csv("compiled_classifications/classification-outputs-animalgame_C_0.1_560.csv") %>%
as.tibble() %>%
select(-X.1, -X) %>%
mutate(denom = (dog_prob + fish_prob + rabbit_prob + bird_prob) - target_label_prob) %>%
mutate(log_odds = log(target_label_prob / denom)) %>%
mutate(exp='animalgame')
#
c_biganimalgame <- read.csv("compiled_classifications/classification-outputs-biganimalgame_C_0.1_600.csv") %>%
as.tibble() %>%
select(-X.1, -X) %>%
mutate(denom = (bear_prob + sheep_prob + camel_prob + tiger_prob) - target_label_prob) %>%
mutate(log_odds = log(target_label_prob / denom)) %>%
mutate(exp='animalgame')
# now read it and join with other classifications
c <- read.csv("compiled_classifications/classification-outputs-objectgame_C_0.1_500.csv") %>%
as.tibble() %>%
select(-X.1, -X) %>%
mutate(denom = (bottle_prob + lamp_prob + hat_prob + cup_prob) - target_label_prob) %>%
mutate(log_odds = log(target_label_prob / denom)) %>%
mutate(exp='objectgame') %>%
full_join(c_vehiclegame) %>%
full_join(c_animalgame) %>%
full_join(c_biganimalgame) %>%
mutate(sketch_path = paste0(target_label,'_','sketch_age',age,'_cdm_',session_id)) %>%
mutate(sketch_path = as.factor(sketch_path)) %>%
rename(model_correct_or_not = correct_or_not)
## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")
## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")
## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")
ggplot(c, aes(x=target_label_prob, y=log_odds, col=model_correct_or_not)) +
geom_jitter(alpha=.1) +
facet_wrap(~target_label) +
theme_few()
num_bins=10
## compute bins for distinctiveness
sketch_by_distinctiveness <- c %>%
mutate(distinct_index = ntile(log_odds,num_bins)) %>% ## compute bins based on log odds over entire dataset
select(sketch_path, distinct_index, log_odds, model_correct_or_not)
## join distinctiveness bins for each sketch in dataset
all_joined <- kids_sketches_d %>%
left_join(sketch_by_distinctiveness)
## Joining, by = "sketch_path"
ggplot(all_joined, aes(x=distinct_index, y=log_odds)) +
geom_jitter(alpha=.03) +
facet_wrap(~intended_category) +
theme_few()
## Warning: Removed 370 rows containing missing values (geom_point).
distinct_by_item <- all_joined %>%
group_by(recognizer_age_group, intended_category, distinct_index) %>%
multi_boot_standard(col='correct_or_not') %>%
group_by(distinct_index)
ggplot(distinct_by_item, aes(x=distinct_index, y=mean, col=recognizer_age_group)) +
geom_pointrange(alpha=.2, aes(ymin = ci_lower, ymax = ci_upper)) +
geom_smooth(alpha=.1, span=10, method='lm') +
theme_few() +
scale_color_viridis(discrete = TRUE, begin=.2, end=.8) +
scale_x_continuous(breaks=seq(1,10,2)) +
xlab('Distinctiveness Index') +
ylab('Proportion recognized') +
theme(legend.position='none') +
facet_wrap(~intended_category, nrow=2)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_pointrange).
## Warning: Removed 2 rows containing missing values (geom_segment).
# distinct_by_age <- all_joined %>%
# filter(!is.na(distinct_index)) %>% # not every item had classificaiotns because we needed to balance
# group_by(recognizer_age_numeric, distinct_index) %>%
# multi_boot_standard(col='correct_or_not') %>%
# group_by(distinct_index)
#
# ggplot(distinct_by_age, aes(x=recognizer_age_numeric, y=mean, col=distinct_index)) +
# geom_pointrange(position = position_dodge(width=.1), aes(ymin = ci_lower, ymax = ci_upper)) +
# geom_smooth(alpha=.2, span=10, method='lm', aes(group=distinct_index)) +
# facet_wrap(~distinct_index, nrow=2) +
# theme_few(base_size=18) +
# theme(legend.position="right") +
# geom_hline(yintercept=.25, linetype='dashed',color='grey') +
# xlab('Recognizer Age') +
# ylab('Proportion drawings recognized') +
# ylim(0,1)
## Panel B in ms figure
distinct_by_age <- all_joined %>%
group_by(recognizer_age_numeric, distinct_index) %>%
multi_boot_standard(col='correct_or_not') %>%
group_by(distinct_index) %>%
mutate(recognizer_age_name = paste0(as.character(recognizer_age_numeric), '-year-olds'))
ggplot(distinct_by_age, aes(x=distinct_index, y=mean, col=as.factor(recognizer_age_numeric))) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), alpha=.8, size=.2) +
geom_smooth(alpha=.2, span=4, method='lm', size=.25) +
facet_wrap(~recognizer_age_numeric, nrow=2) +
theme_few(base_size = 10) +
xlab('Classifier evidence') +
ylab('Proportion drawings recognized') +
theme(legend.position='none') +
scale_color_viridis(discrete=TRUE) +
scale_x_continuous(
breaks=c(2,9),
labels=c(' Low ',' High ')
) +
theme(axis.ticks.x=element_blank(), aspect.ratio=1) +
geom_hline(yintercept=.25, linetype='dashed',color='grey') +
ylim(0,1)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_pointrange).
ggsave('figures/DistinctByAge.pdf', height=3, units='in')
## Saving 7 x 3 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_pointrange).
# sketch_wise <- all_joined %>%
# ungroup() %>%
# group_by(sketch_path, recognizer_age_numeric) %>%
# dplyr::summarize(prop_correct = mean(correct_or_not, na.rm=TRUE), log_odds = mean(log_odds))
#
# ggplot(sketch_wise, aes(x=log_odds, y=prop_correct, col=recognizer_age_numeric)) +
# geom_point(alpha=.2) +
# geom_smooth(alpha=.2, span=4, method='lm', size=.25) +
# theme_few(base_size = 10) +
# facet_wrap(~recognizer_age_numeric) +
# ylab('Proportion drawings recognized') +
# theme(legend.position='none') +
# # scale_color_viridis(discrete=TRUE) +
# geom_hline(yintercept=.25, linetype='dashed',color='grey') +
# ylim(0,1)
model_glmer <- glmer(correct_or_not ~ scale(log_odds)*scale(recognizer_age_numeric) + (scale(log_odds)|intended_category) + (1|sessionId), data = all_joined, family='binomial')
out = summary(model_glmer)
round(out$coefficients,3)
## Estimate Std. Error z value
## (Intercept) 0.050 0.121 0.413
## scale(log_odds) 0.477 0.046 10.406
## scale(recognizer_age_numeric) 0.317 0.019 16.777
## scale(log_odds):scale(recognizer_age_numeric) 0.062 0.014 4.246
## Pr(>|z|)
## (Intercept) 0.679
## scale(log_odds) 0.000
## scale(recognizer_age_numeric) 0.000
## scale(log_odds):scale(recognizer_age_numeric) 0.000
xtable::xtable(summary(out)$coef, digits=3, caption = "Model coefficients of a GLMM predicting visual recognition performance as a function of recognizer age and visual 'distinctiveness' (i.e. log-odds probability of selecting the correct label in logistic regression based on visual features, see Methods).")
## % latex table generated in R 3.6.1 by xtable 1.8-4 package
## % Wed Nov 18 13:09:14 2020
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & z value & Pr($>$$|$z$|$) \\
## \hline
## (Intercept) & 0.050 & 0.121 & 0.413 & 0.679 \\
## scale(log\_odds) & 0.477 & 0.046 & 10.406 & 0.000 \\
## scale(recognizer\_age\_numeric) & 0.317 & 0.019 & 16.777 & 0.000 \\
## scale(log\_odds):scale(recognizer\_age\_numeric) & 0.062 & 0.014 & 4.246 & 0.000 \\
## \hline
## \end{tabular}
## \caption{Model coefficients of a GLMM predicting visual recognition performance as a function of recognizer age and visual 'distinctiveness' (i.e. log-odds probability of selecting the correct label in logistic regression based on visual features, see Methods).}
## \end{table}
low_accuracy_drawings <- cor_by_trial_type %>%
filter(photo_or_not==FALSE) %>%
filter(avg_correct<.5)
low_accuracy_photo <- cor_by_trial_type %>%
filter(photo_or_not==TRUE) %>%
filter(avg_correct<1)
# filter low acc subjects (not in prereg but in attempt to equalize age groups even more)
high_acc_only <- all_joined %>%
filter(!sessionId %in% low_accuracy_drawings$sessionId) %>%
filter(!sessionId %in% low_accuracy_photo$sessionId)
model_high_acc <- glmer(correct_or_not ~ scale(log_odds)*scale(recognizer_age_numeric) + (scale(log_odds)|intended_category) + (1|sessionId), data = high_acc_only, family='binomial')
out_high_acc = summary(model_high_acc)
round(out_high_acc$coefficients,3)
## Estimate Std. Error z value
## (Intercept) 0.668 0.099 6.717
## scale(log_odds) 0.518 0.051 10.059
## scale(recognizer_age_numeric) 0.141 0.023 6.190
## scale(log_odds):scale(recognizer_age_numeric) 0.056 0.023 2.464
## Pr(>|z|)
## (Intercept) 0.000
## scale(log_odds) 0.000
## scale(recognizer_age_numeric) 0.000
## scale(log_odds):scale(recognizer_age_numeric) 0.014
xtable::xtable(summary(out_high_acc)$coef, digits=3, caption = "Model coefficients of a GLMM predicting visual recognition performance, excluding any participant who scored less than 95% on photo trials, or 50% on drawing trials).")
## % latex table generated in R 3.6.1 by xtable 1.8-4 package
## % Wed Nov 18 13:09:18 2020
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & z value & Pr($>$$|$z$|$) \\
## \hline
## (Intercept) & 0.668 & 0.099 & 6.717 & 0.000 \\
## scale(log\_odds) & 0.518 & 0.051 & 10.059 & 0.000 \\
## scale(recognizer\_age\_numeric) & 0.141 & 0.023 & 6.190 & 0.000 \\
## scale(log\_odds):scale(recognizer\_age\_numeric) & 0.056 & 0.023 & 2.464 & 0.014 \\
## \hline
## \end{tabular}
## \caption{Model coefficients of a GLMM predicting visual recognition performance, excluding any participant who scored less than 95% on photo trials, or 50% on drawing trials).}
## \end{table}
by_recognizer_age <- high_acc_only %>%
filter(producer_age != 'photo') %>%
group_by(sessionId, recognizer_age_numeric) %>%
summarize(avg_correct = mean(correct_or_not), num_trials = n()) %>%
filter(num_trials > 5) %>%
group_by(recognizer_age_numeric) %>%
multi_boot_standard(col = 'avg_correct')
## `summarise()` regrouping output by 'sessionId' (override with `.groups` argument)
ggplot(by_recognizer_age,aes(x=recognizer_age_numeric, y=mean, col = recognizer_age_numeric)) +
theme_few(base_size = base_size_chosen) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
scale_color_viridis(discrete = "FALSE") +
ylab('Proportion drawings recognized') +
ylim(0, 1) +
geom_hline(yintercept = .25, linetype = 'dashed', color='grey') +
geom_smooth(color = 'grey', span=10) +
xlab('Recognizer Age') +
theme(legend.position='none') +
# labs(title='Drawing recognition by age') +
scale_x_continuous(breaks = seq(3,10,1))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggsave('figures/drawing_recognition_by_age_high_acc_only.pdf',width=3, height=4.8, units='in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(all_joined, aes(x=log_odds, y=as.numeric(correct_or_not), col=recognizer_age_numeric)) +
geom_jitter(height=.2, width=.01, alpha=.1) +
geom_smooth(alpha=.1, method='lm') +
theme_few(base_size = 14) +
scale_y_continuous(breaks=c(0,1)) +
xlab('Log odds probability (VGG-19 classifications)') +
ylab('Proportion recognized') +
theme(legend.position='none') +
scale_color_viridis() +
facet_grid(~recognizer_age_numeric)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 370 rows containing non-finite values (stat_smooth).
## Warning: Removed 370 rows containing missing values (geom_point).
kid_v_model_by_group <- all_joined %>%
filter(recognizer_age_numeric>7) %>%
group_by(producer_age, intended_category) %>%
dplyr::summarize(model_correct = mean(model_correct_or_not, na.rm=TRUE), kid_correct = mean(correct_or_not, na.rm=TRUE))
## `summarise()` regrouping output by 'producer_age' (override with `.groups` argument)
ggplot(kid_v_model_by_group, aes(x=model_correct, y=kid_correct)) +
geom_point(alpha=.8) +
geom_smooth(alpha=.1, method='lm') +
theme_few(base_size = 14) +
# scale_y_continuous(breaks=c(0,1)) +
xlab('Model proportion recognied') +
ylab('Older kid roportion recognized') +
theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
kid_v_model <- all_joined %>%
ungroup() %>%
filter(recognizer_age_numeric>7) %>%
group_by(sketch_path) %>%
dplyr::summarize(model_correct = mean(model_correct_or_not, na.rm=TRUE), kid_correct = mean(correct_or_not, na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(kid_v_model, aes(x=model_correct, y=kid_correct)) +
geom_jitter(width=.1, height=.1, alpha=.2) +
geom_smooth(alpha=.1, method='lm') +
theme_few(base_size = 14) +
ggtitle('Recognition by sketch') +
# scale_y_continuous(breaks=c(0,1)) +
xlab('Model recognized') +
ylab('Older kid proportion recognized') +
theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (stat_smooth).
## Warning: Removed 33 rows containing missing values (geom_point).